suppressPackageStartupMessages({
library(SummarizedExperiment)
library(SEtools)
library(ggplot2)
library(plgINS)
library(edgeR)
})# change names
colnames(se.splicing) <- sapply(colnames(se.splicing), function(x) rev(unlist(strsplit(x,"\\.")))[1] )
# modify colData
se.splicing$cell <- factor(c(rep("ESC",6),rep("iN",5)))
se.splicing$miRNA <- factor(c(rep("ctrl",3),rep("miR-379/410",3),rep("ctrl",3),rep("miR-379/410",2)))
se.splicing$rep <- factor(c(rep(LETTERS[1:3],3), "B","C"))
se.splicing$isRepA <- factor(se.splicing$rep=="A")
# filter raw data
filter <- c(10,2)
se.splicing <- se.splicing[which(rowSums(assay(se.splicing) >= filter[1]) >= filter[2]),]#' DEAprep
#'
#' Generates control samples out of the average values over all supplied samples, combines spliced and unspliced
#' assays and calculates their logCPM & log2FC data.
#' All is assembled into one SE object.
#'
#'
#' @param se SummarizedExperiment object containing assays of raw counts of spliced & unspliced tx
#'
#' @return an SE object of combined spliced and unspliced data as well as control samples generated from averaging over all treatment samples
#'
DEAprep <- function(se, control){
# allocation
readtype <- list(spliced="spliced",unspliced="unspliced")
## generate DGEList object
dds1 <- DGEList(assays(se)$spliced)
dds2 <- DGEList(assays(se)$unspliced)
## combine the spliced & unspliced assays
dds <- cbind(dds1, dds2)
# generate colData
dd1 <- colData(se)[,c("rep","miRNA","cell","isRepA")]
dd1$readtype <- readtype$spliced
## duplicate dd to have data for combined spliced & unspliced assay
dd <- rbind(dd1, dd1)
dd$readtype[(nrow(dd1)+1):nrow(dd)] <- readtype$unspliced
## rename both dds & dd object features
n.cols <- lapply(readtype, function(x) sapply(colnames(se), function(y) paste(y, x, sep=".") ))
colnames(dds) <- c(n.cols$spliced, n.cols$unspliced)
rownames(dd) <- c(n.cols$spliced, n.cols$unspliced)
# rebuild SE object
## SE object with logCPM & logFC assays
se <- SummarizedExperiment(assays=list(counts=dds$counts),
rowData=rowData(se),
colData=dd)
assays(se)$logcpm <- log1p(cpm(calcNormFactors(DGEList(assay(se)))))
return(se)
}#' exonDEA
#'
#' @param se SE object containing assays with combined spliced & unspliced counts
#' @param model function: model matrix function to test
#' @param model0 function: model matrix function to be tested against
#' @param control string: name of control samples inside colData() of supplied SE object
#'
#' @return SE object containing DEA results over all treatments & over individual ones
#'
exonDEA <- function(se, model, model0=~1, control){
## allocation
se$miRNA <- relevel(droplevels(se$miRNA), ref=control)
se$readtype <- relevel(factor(se$readtype), ref="unspliced")
se$rep <- droplevels(se$rep)
dd <- colData(se)
## normalization
dds <- calcNormFactors(DGEList(assays(se)$counts))
## models
mm <- model.matrix(model, data=dd)
mm0 <- model.matrix(model0, data=dd)
testCoeff <- setdiff(colnames(mm), colnames(mm0))
## estimate dispersion
dds <- estimateDisp(dds,mm)
## fit negative binomial distribution on counts per gene (use glmFit for few replicates)
fit <- glmFit(dds, mm)
## fit a GLM on the data
lrt.comb <- glmLRT(fit, testCoeff)
## top genes that change relative to stage 0
res.comb <- as.data.frame(topTags(lrt.comb, Inf))
## fit linear model dropping one sample at a time (using multiple cores)
res.list <- bplapply( testCoeff, BPPARAM=MulticoreParam(8), FUN=function(x){
as.data.frame(topTags(glmLRT(fit, x),Inf))
})
dea.names <- gsub("readtype","", testCoeff)
dea.names <- make.names(gsub(":miRNA",".", dea.names))
colnames(res.comb)[grepl("logFC",colnames(res.comb))] <- paste0("logFC.", dea.names)
names(res.list) <- paste0("DEA.",dea.names)
## add DEAs
rowData(se)$DEA.spliced.all <- DataFrame(res.comb[rownames(se),])
for(i in paste0("DEA.",dea.names)){
rowData(se)[[i]] <- DataFrame(res.list[[i]][rownames(se),])
}
## add logFC assay
se <- plgINS::svacor(se, form = model, form0 = model0)
se <- SEtools::log2FC(se, controls = se$miRNA==control, by = se$readtype, fromAssay = "corrected", isLog = TRUE)
return(se)
}# combined readtype:miRNA effect
se.esc.comb <- exonDEA(se.esc, model = ~readtype*miRNA, model0 = ~readtype+miRNA, control="ctrl")
se.in.comb <- exonDEA(se.in, model = ~readtype*miRNA, model0 = ~readtype+miRNA, control="ctrl")# number of significant tx
## combined ESC [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
sapply(rowData(se.esc.comb)[,grepl("DEA",colnames(rowData(se.esc.comb)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 4 4
## combined iN [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
sapply(rowData(se.in.comb)[,grepl("DEA",colnames(rowData(se.in.comb)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 217 217
# batch effect included (rep)
se.esc.batch <- exonDEA(se.esc, model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA, "ctrl")
se.in.batch <- exonDEA(se.in, model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA, "ctrl")# number of significant tx
## batch ESC [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
sapply(rowData(se.esc.batch)[,grepl("DEA",colnames(rowData(se.esc.batch)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 13 13
## batch iN [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
sapply(rowData(se.in.batch)[,grepl("DEA",colnames(rowData(se.in.batch)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 579 579
# batch effect included (isRepA)
se.esc.batchA <- exonDEA(se.esc, model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA, "ctrl")
se.in.batchA <- exonDEA(se.in, model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA, "ctrl")# number of significant tx
## batchA ESC [model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA]
sapply(rowData(se.esc.batchA)[,grepl("DEA",colnames(rowData(se.esc.batchA)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 13 13
## batchA iN [model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA]
sapply(rowData(se.in.batchA)[,grepl("DEA",colnames(rowData(se.in.batchA)))], function(x) sum(x$FDR < .05) )## DEA.spliced.all DEA.spliced.miR.379.410
## 460 460
# heatmap function
makeHM <- function(se, sig, nr=20, logcpm=FALSE){
for(i in sig){
if(sum(rowData(se)$DEA.spliced.all$FDR < i) > nr){
cat("FDR <", i, "\n")
# logFC heatmap
SEtools::sehm(se[,order(colData(se)$miRNA)], row.names(se)[rowData(se)$DEA.spliced.all$FDR < i], gaps_at = "readtype",
breaks=TRUE, do.scale = FALSE, show_colnames = FALSE, assayName = "log2FC", anno_columns = c("miRNA","readtype"))
# logcpm heatmap
se.sp <- se[,colData(se)$readtype=="spliced"]
if(logcpm){
SEtools::sehm(se.sp[,order(colData(se.sp)$miRNA)], row.names(se.sp)[rowData(se.sp)$DEA.spliced.all$FDR < i], gaps_at = "readtype",
breaks=TRUE, do.scale = TRUE, show_colnames = FALSE, assayName = "logcpm", anno_columns = c("miRNA","readtype"))
}
break
}
}
}# heatmaps of DEA.spliced.all
# combined ESC [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
makeHM(se.esc.comb, sig.lvl)## FDR < 0.5
## FDR < 0.005
## batch ESC [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
makeHM(se.esc.batch, sig.lvl)## FDR < 0.1
## batch iN [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
makeHM(se.in.batch, sig.lvl)## FDR < 0.005
# spliced v unspliced ESC
lib.spliced <- apply(assays(se.esc.comb[,colData(se.esc.comb)$readtype=="spliced"])$counts, 2, function(x) sum(x))
lib.unspliced <- apply(assays(se.esc.comb[,colData(se.esc.comb)$readtype=="unspliced"])$counts, 2, function(x) sum(x))
median(lib.unspliced / lib.spliced)## [1] 0.58242
# spliced v unspliced iN
lib.spliced <- apply(assays(se.in.comb[,colData(se.in.comb)$readtype=="spliced"])$counts, 2, function(x) sum(x))
lib.unspliced <- apply(assays(se.in.comb[,colData(se.in.comb)$readtype=="unspliced"])$counts, 2, function(x) sum(x))
median(lib.unspliced / lib.spliced)## [1] 0.6334839
# check number of positive & negative logFC for different significances
sigsDF <- function(se, sig, dea, thr){
data.frame(sigLevel=rep(sig,3),
counts=c(sapply(sig, function(x) sum(sign(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) == -1 &
abs(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) >= thr)),
sapply(sig, function(x) sum(sign(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) == 0 &
abs(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) >= thr)),
sapply(sig, function(x) sum(sign(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) == 1 &
abs(assays(se[rowData(se)[[dea]]$FDR < x,])$log2FC) >= thr))
),
sign=c(rep("-1",length(sig)),
rep("0",length(sig)),
rep("1",length(sig))
),
dea=rep(dea,length(sig)*3)
)
}# for each model & DEA: find number of down- & upregulations at different significance levels
## significance levels of interest
sig <- c(1e-10, 1e-5, .05, .1, .5, .8)
## only absolute log2FC greater than this will be considered
fc.thr <- .5# create dataframes with count informations
## combined ESC [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
sigs.esc.comb <- lapply(grep("DEA",colnames(rowData(se.esc.comb)),value=TRUE), function(x){
sigsDF(se.esc.comb, sig, x, fc.thr)
})
sigs.esc.comb <- data.frame(do.call("rbind",sigs.esc.comb))
## combined iN [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
sigs.in.comb <- lapply(grep("DEA",colnames(rowData(se.in.comb)),value=TRUE), function(x){
sigsDF(se.in.comb, sig, x, fc.thr)
})
sigs.in.comb <- data.frame(do.call("rbind",sigs.in.comb))# create dataframes with count informations
## batch ESC [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
sigs.esc.batch <- lapply(grep("DEA",colnames(rowData(se.esc.batch)),value=TRUE), function(x){
sigsDF(se.esc.batch, sig, x, fc.thr)
})
sigs.esc.batch <- data.frame(do.call("rbind",sigs.esc.batch))
## batch iN [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
sigs.in.batch <- lapply(grep("DEA",colnames(rowData(se.in.batch)),value=TRUE), function(x){
sigsDF(se.in.batch, sig, x, fc.thr)
})
sigs.in.batch <- data.frame(do.call("rbind",sigs.in.batch))# create dataframes with count informations
## batchA ESC [model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA]
sigs.esc.batchA <- lapply(grep("DEA",colnames(rowData(se.esc.batchA)),value=TRUE), function(x){
sigsDF(se.esc.batchA, sig, x, fc.thr)
})
sigs.esc.batchA <- data.frame(do.call("rbind",sigs.esc.batchA))
## batchA iN [model = ~isRepA+readtype*miRNA, model0 = ~isRepA+readtype+miRNA]
sigs.in.batchA <- lapply(grep("DEA",colnames(rowData(se.in.batchA)),value=TRUE), function(x){
sigsDF(se.in.batchA, sig, x, fc.thr)
})
sigs.in.batchA <- data.frame(do.call("rbind",sigs.in.batchA))# combined ESC [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
ggplot(sigs.esc.comb, aes(x=as.factor(sigLevel), y=counts, fill=sign)) + geom_bar(position="dodge", stat="identity") + facet_wrap(~dea)# combined iN [model = ~readtype*miRNA, model0 = ~readtype+miRNA]
ggplot(sigs.in.comb, aes(x=as.factor(sigLevel), y=counts, fill=sign)) + geom_bar(position="dodge", stat="identity") + facet_wrap(~dea)# batch ESC [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
ggplot(sigs.esc.batch, aes(x=as.factor(sigLevel), y=counts, fill=sign)) + geom_bar(position="dodge", stat="identity") + facet_wrap(~dea)# batch iN [model = ~rep+readtype*miRNA, model0 = ~rep+readtype+miRNA]
ggplot(sigs.in.batch, aes(x=as.factor(sigLevel), y=counts, fill=sign)) + geom_bar(position="dodge", stat="identity") + facet_wrap(~dea)